---------------------------------------------------------------------------------------------------------------------------
      name:  <unnamed>
       log:  /Users/clotairemit.edu/Dropbox (MIT)/J-PAL Raskin Transition/10_Analysis&Results/Agent Experiment Analysis/03_
> Logs/20240426_clean_susenas_finance_mar19.log
  log type:  text
 opened on:  26 Apr 2024, 00:06:52

. 
. 
. 
. /*----------------------------------------------------*/
.                 /* Section: Blok 5 & 6 */
. /*----------------------------------------------------*/
. 
. ** Block 5 a
. 
.         u "$importable/2019_03_BLOK5A.dta", clear

. 
. rename renum RENUM

. 
. gen ex1 = 1

. 
. collapse (sum) ex , by(RENUM)

. 
. 
. tempfile blok5a

. save `blok5a'
file /var/folders/23/7_md6wbn6ns7m_ppp9yb96sh0000gp/T//S_18300.000001 saved as .dta format

. 
. ** Block 5 b
. 
. u "$importable/2019_03_BLOK5B.dta", clear

. 
. rename renum RENUM

. 
. gen nnagric_income = 0

. replace nnagric_income = b5bk6 if b5bk3 > 6 & b5bk3 < 99
(124,687 real changes made)

. // summ nnagric_income
. 
. gen foodaccom_income = 0

. replace foodaccom_income = b5bk6 if b5bk3 == 14
(17,033 real changes made)

. 
. gen trans_income = 0

. replace foodaccom_income = b5bk6 if b5bk3 == 13
(9,818 real changes made)

. 
. gen retail_income = 0

. replace retail_income = b5bk6 if b5bk3 == 12
(57,734 real changes made)

. 
. gen manuf_income = 0

. replace retail_income = b5bk6 if b5bk3 == 9
(208 real changes made)

. 
. collapse (sum) nnagric_income foodaccom_income ///
> trans_income retail_income manuf_income, by(RENUM)

. 
. // summ nnagric_income
. 
.         tempfile blok5b

.         save `blok5b'
file /var/folders/23/7_md6wbn6ns7m_ppp9yb96sh0000gp/T//S_18300.000002 saved as .dta format

. 
. 
. ** Block 6 and other parts of 5
. 
.         u "$importable/2019_03_BLOK5_RT.dta", clear

. 
.         rename renum RENUM

. 
.         rename b6r2k2 hh_busi_income

. 
.         rename b6r6 hh_savings

. 
.         merge 1:1 RENUM using `blok5b'

    Result                      Number of obs
    -----------------------------------------
    Not matched                       103,287
        from master                   103,287  (_merge==1)
        from using                          0  (_merge==2)

    Matched                           212,385  (_merge==3)
    -----------------------------------------

. 
.                                 // we can recode conditionality as 0 for non existant businesses incomes
. 
.         foreach var in nnagric_income foodaccom_income trans_income retail_income manuf_income {
  2.     replace `var' = 0 if hh_busi_income == 0 // or if _merge == 1 perfectly similar
  3. }
(103,287 real changes made)
(103,287 real changes made)
(103,287 real changes made)
(103,287 real changes made)
(103,287 real changes made)

.                 drop _m

. 
. keep *_income hh_* RENUM

. 
.         //      merge 1:1 RENUM using `blok5a'
.         //      drop _m
. 
.         tempfile blok6

.         save `blok6'
file /var/folders/23/7_md6wbn6ns7m_ppp9yb96sh0000gp/T//S_18300.000003 saved as .dta format

. 
. 
. 
. 
. /*----------------------------------------------------*/
.                 /* Section: Blok 42 */
. /*----------------------------------------------------*/
. 
. ** financial transaction fees
.         u "$importable/blok42_mar19.dta", clear

.         keep if KODE == 268 | KODE == 287
(12,841,565 observations deleted)

.         gen fin_trans_fees = B42K5 if KODE == 268
(44,634 missing values generated)

.         // summ fin_trans_fees
. 
. ** cell phone/smartphone spending (purchase, repairs, accessories)
.         gen cell_phone_spend = B42K5 if KODE == 287
(148,809 missing values generated)

.         // summ cell_phone_spend
. 
. ** collapse to HH level
.         collapse (sum) fin_trans_fees cell_phone_spend, by(RENUM)

.         // summ fin_trans_fees if fin_trans_fees > 0
.         // summ cell_phone_spend if cell_phone_spend > 0
. 
. * save
.         tempfile blok42

.         save `blok42'
file /var/folders/23/7_md6wbn6ns7m_ppp9yb96sh0000gp/T//S_18300.000004 saved as .dta format

. 
. /*----------------------------------------------------*/
.             /* Section: Individual Module */
. /*----------------------------------------------------*/
. 
. u "${importable}/kor19ind.dta", clear

. 
. ** KB card
. 
. /// tab  R1604
> gen KB_ind = R1604
(1,084,987 missing values generated)

. replace KB_ind = 0 if R1604 == 5
(85,262 real changes made)

. // summ KB_ind
. 
. 
. ** hours worked
.         /// tab  R701_A
>         //tab  R703
.         // summ  R707
.         // //tab  R707
.         gen hours_worked = R707
(643,253 missing values generated)

. 
. ** student
.         //tab  R701_B
.         gen student = R701_B == "B"

. 
. ** savings account
.         //tab  R808
.         recode R808 (5 = 0), gen(savings_ind)
(756,189 differences between R808 and savings_ind)

.         // gen savings_ind_miss = savings_ind == .
.         // tab R407 savings_ind_miss
.         // all missings are for children < 5
. 
. ** Online banking
.         //tab  R807_H
.         gen online_banking_ind = R807_H == "H"

.         //tab  online_banking_ind
. 
. ** Online purchasing
.         //tab  R807_E
.         gen online_buy_ind = R807_E == "E"

.         //tab  online_buy_ind
. 
. ** Online selling
.         //tab  R807_F
.         gen online_sell_ind = R807_F == "F"

.         //tab  online_sell_ind
. 
. ** female head of household
.         //tab  R403 R405 if R403 == 1
.         gen female_KRT = R403 == 1 & R405 == 2 if R403 == 1
(888,794 missing values generated)

.         // summ female_KRT
. 
. ** head of household is widow
.         //tab  female_KRT R404
.         gen widow = female_KRT == 1 & R404 == 4

.         //tab  widow
. 
. ** disruptive health issue
.         //tab  R1102
.         /// tab  R1103
>         gen health_complaints_ind = R1103 == 1

.         // summ health_complaints_ind
. 
. ** disabilities
.         * Vision
.         /// tab  R1002
>         gen vision_dis_sev = R1002 == 1 | R1002 == 2 if R1002 != .
(41,209 missing values generated)

.         // summ vision_dis_sev
.         gen vision_dis_mild = R1002 == 1 | R1002 == 2 | R1002 == 3 if R1002 != .
(41,209 missing values generated)

.         // summ vision_dis_mild
. 
.         * Hearing
.         //tab  R1003
.         gen hearing_dis_sev = R1003 == 5 | R1003 == 6 if R1003 != .
(41,209 missing values generated)

.         // summ hearing_dis_sev
.         gen hearing_dis_mild = R1003 == 5 | R1003 == 6 | R1003 == 7 if R1003 != .
(41,209 missing values generated)

.         // summ hearing_dis_mild
. 
.         * walking
.         //tab  R1004
.         gen walking_dis_sev = R1004 == 1 | R1004 == 2 if R1004 != .
(41,209 missing values generated)

.         // summ walking_dis_sev
.         gen walking_dis_mild = R1004 == 1 | R1004 == 2 | R1004 == 3 if R1004 != .
(41,209 missing values generated)

.         // summ walking_dis_mild
. 
.         * hand movement
.         //tab  R1005
.         gen hand_dis_sev = R1005 == 5 | R1005 == 6 if R1005 != .
(41,209 missing values generated)

.         // summ hand_dis_sev
.         gen hand_dis_mild = R1005 == 5 | R1005 == 6 | R1005 == 7 if R1005 != .
(41,209 missing values generated)

.         // summ hand_dis_mild
. 
.         * memory/concentration
.         //tab  R1006
.         gen memory_dis_sev = R1006 == 1 | R1006 == 2 if R1006 != .
(41,209 missing values generated)

.         // summ memory_dis_sev
.         gen memory_dis_mild = R1006 == 1 | R1006 == 2 | R1006 == 3 if R1006 != .
(41,209 missing values generated)

.         // summ memory_dis_mild
. 
.         * behavioral disorder
.         //tab  R1007
.         gen behav_dis_sev = R1007 == 5 | R1007 == 6 if R1007 != .
(41,209 missing values generated)

.         // summ behav_dis_sev
.         gen behav_dis_mild = R1007 == 5 | R1007 == 6 | R1007 == 7 if R1007 != .
(41,209 missing values generated)

.         // summ behav_dis_mild
. 
.         * Speech disorder
.         //tab  R1008
.         gen speech_dis_sev = R1008 == 1 | R1008 == 2 if R1008 != .
(41,209 missing values generated)

.         // summ speech_dis_sev
.         gen speech_dis_mild = R1008 == 1 | R1008 == 2 | R1008 == 3 if R1008 != .
(41,209 missing values generated)

.         // summ speech_dis_mild
. 
.         * Unable to take care of oneself
.         //tab  R1009
.         gen take_care_dis_sev = R1009 == 5 | R1009 == 6 if R1009 != .
(41,209 missing values generated)

.         // summ take_care_dis_sev
.         gen take_care_dis_mild = R1009 == 5 | R1009 == 6 | R1009 ==7 if R1009 != .
(41,209 missing values generated)

.         // summ take_care_dis_mild
. 
.         * any disability (individual)
.         egen tot_disability = rowtotal(*dis*)

.         gen any_disability = tot_disability >= 1

.         //tab  any_disability
. 
.         * any severe disability (individual)
.         egen tot_sev_disability = rowtotal(*dis_sev)

.         gen any_sev_disability = tot_sev_disability >= 1

.         //tab  any_sev_disability
.         drop tot_disability tot_sev_disability

. 
.         * head of household disability
.         gen hh_head = R403 == 1

.         gen any_dis_KRT = any_disability == 1 & hh_head == 1

.         //tab  any_dis_KRT if hh_head == 1
. 
.         * head of household severe disability
.         gen any_sev_dis_KRT = any_sev_disability == 1 & hh_head == 1

.         //tab  any_sev_dis_KRT if hh_head == 1
. 
. ** Head of household education levels
.         //tab  R613
.         // School classification descriptions from: https://mikrodata.bps.go.id/mikrodata/index.php/catalog/814/datafile/
> F1#page=F4&//tab =data-dictionary
. 
.         * Elementary school
.         // dummy if hh member has elementary edu or equivalent: (paket A, SDLB, SD, MI)
.         gen elementary = inlist(R613, 1, 2, 3, 4) if R403 == 1
(888,794 missing values generated)

. 
.         * Junior high school
.         // dummy if hh member has junior high edu or equivalent: (paket B, SMPLB, SMP, MTs)
.         gen junior_high = inlist(R613, 5, 6, 7, 8) if R403 == 1
(888,794 missing values generated)

. 
.         * High school
.         // dummy if hh member has high school edu or equivalent, or vocational school: (paket C, SMLB, SMA, MA, SMK, MAK)
.         gen high_school = inlist(R613, 9, 10, 11, 12, 13, 14) if R403 == 1
(888,794 missing values generated)

. 
.         * Tertiary edu
.         // dummy if hh member has tertiary edu: (D1/D2, D3, D4, S1, S2, S3)
.         gen tertiary = inlist(R613, 15, 16, 17, 18, 19, 20, 21) if R403 == 1
(888,794 missing values generated)

.         // summ elementary junior_high high_school tertiary
. 
. 
. *** Non HH head edu
.         * Elementary school
.         //tab  R615
.         // dummy if hh member has elementary edu or equivalent: (paket A, SDLB, SD, MI)
.         gen elementary_edu = inlist(R615, 2, 3, 4, 5)

. 
.         * Junior high school
.         // dummy if hh member has junior high edu or equivalent: (paket B, SMPLB, SMP, MTs)
.         gen junior_high_edu = inlist(R615, 6, 7, 8, 9)

. 
.         * High school
.         // dummy if hh member has high school edu or equivalent, or vocational school: (paket C, SMLB, SMA, MA, SMK, MAK)
.         gen high_school_edu = inlist(R615, 10, 11, 12, 13, 14, 15)

. 
.         * Tertiary edu
.         // dummy if hh member has tertiary edu: (D1/D2, D3, D4, S1, S2, S3)
.         gen tertiary_edu = inlist(R615, 16, 17, 18, 19, 20, 21, 22)

.         // summ elementary junior_high high_school tertiary
. 
. ** kids
.         // summ R407
.         gen kid = R407 < 18

.         // summ R407 if kid == 1
. 
. ** adults
.         gen adult = R407 >= 18

.         // summ adult
.         gen age = R407

. 
. ** eduation levels
.         gen high_edu = R613 >= 9 if R613 != .
(185,622 missing values generated)

. 
. ** gender
.         //tab  R405
.         gen female = R405 == 2

. 
. ** older vs. younger
.         gen under30 = R407 < 30

.         gen under40 = R407 < 40

. 
. ** cell phone
.         //tab  R802
.         recode R802 (5 = 0), gen(cell_phone)
(447,145 differences between R802 and cell_phone)

. 
. ** internet usage
.         //tab  R804
.         recode R804 (5 = 0), gen(internet_use)
(662,651 differences between R804 and internet_use)

. 
.         *  Recovering the numnber of small buisnesses in SUESENA 2019
. 
.         //tab  R705
.         gen smallbui_sus19_type1_ind = R705 == 1

.         replace smallbui_sus19_type1_ind = 0 if R704 <= 6 // removing farmers
(58,676 real changes made)

.         replace smallbui_sus19_type1_ind = 0 if R704 == 25
(88 real changes made)

.         // summ smallbui_sus19_type1_ind
. 
.         gen smallbui_sus19_type2_ind = R705 == 2

.         replace smallbui_sus19_type2_ind = 0 if R704 <= 6
(55,381 real changes made)

.         replace smallbui_sus19_type2_ind = 0 if R704 == 25
(9 real changes made)

.         // summ smallbui_sus19_type2_ind
. 
.         gen smallbui_sus19_type3_ind = R705 == 3

.         replace smallbui_sus19_type3_ind = 0 if R704 <= 6
(5,879 real changes made)

.         replace smallbui_sus19_type3_ind = 0 if R704 == 25
(23 real changes made)

.         // summ smallbui_sus19_type3_ind
. 
.         gen smallbui_sus19_all_ind = smallbui_sus19_type1_ind

.         replace smallbui_sus19_all_ind = 1 if smallbui_sus19_type2_ind == 1
(24,580 real changes made)

.         replace smallbui_sus19_all_ind = 1 if smallbui_sus19_type2_ind == 1
(0 real changes made)

.         // summ smallbui_sus19_all_ind
. 
. gen bus_nagr = (R704 > 6 & R704 < 999)

. gen bus_ret = (R704 == 12) // retail

. gen bus_food = (R704 == 14) // Food accom

. gen bus_manuf = (R704 == 8) // Manufacturing

. gen bus_other = (R704 == 7 ///
>         | R704 ==  9 | R704 ==  11 | R704 ==  15 | R704 ==  16 | R704 ==  17 ///
>         | R704 ==  18 | R704 ==  19 | R704 ==  20 | R704 ==  21 | R704 ==  22 ///
>         | R704 ==  23 | R704 ==  24 | R704 ==  25 | R704 ==  26) // Other

. gen bus_trans = (R704 == 13) // Transportaion

. gen bus_agr = (R704 == 2 | R704 == 3 | R704 == 4 | R704 == 5) // Agric not farming

. 
. 
. ** save individual level dataset (will come back to it at the end)
.         preserve

.         keep KB_ind smallbui_sus19_all_ind smallbui_sus19_type1_ind smallbui_sus19_type2_ind smallbui_sus19_type3_ind ///
>         RENUM R101 R102 R105 R40? bus_ret bus_nagr bus_food bus_manuf bus_other bus_trans bus_agr ///
>         savings_ind online* adult high_edu female under* cell_phone internet_use FWT *edu hours_worked age student kid wi
> dow

.         tempfile ind_dataset

.         qui save `ind_dataset'

.         restore

. 
. ** collapse total by household
.         collapse (sum) savings_ind online* female_KRT widow num_kids=kid elementary junior_high high_school tertiary ///
>         KB_ind smallbui_sus19_all_ind smallbui_sus19_type1_ind smallbui_sus19_type2_ind smallbui_sus19_type3_ind ///
>                 internet_use cell_phone health_complaints = health_complaints_ind *_dis_* , by(RENUM)

. 
. ** HH level variables
.         * internet use
.         // summ internet_use
.         gen internet_use_hh = internet_use > 0

.         drop internet_use

.         rename internet_use_hh internet_use

.         //tab  internet_use
. 
.         * cell phone ownership
.         // summ cell_phone
.         gen cell_phone_hh = cell_phone > 0

.         drop cell_phone

.         rename cell_phone_hh cell_phone

.         //tab  cell_phone
. 
.         * single woman with kids
.         gen single_mom = female_KRT == 1 & num_kids > 0

.         //tab  single_mom
. 
.         * Savings account = 1 if at least one hh member has savings account
.         * Savings account = 1 if at least one hh member has savings account  MISTAKE //!\\
.         gen savings_account = (savings_ind >= 1 & savings_ind < 99)

.         drop savings_ind

. 
.         * KB  = 1 if at least one hh member has KB
.         gen KB = KB_ind >= 1

.         drop KB_ind

. 
.         * Small buism variables = 1 if at least one hh member is part of such business
. 
. 
.         gen smallbui_sus19_all = (smallbui_sus19_all_ind >= 1) if smallbui_sus19_all_ind<.

. 
. 
. 
.         gen smallbui_sus19_type1 = (smallbui_sus19_type1_ind >= 1) if smallbui_sus19_type1_ind<.

.         drop smallbui_sus19_type1_ind

. 
.         gen smallbui_sus19_type2 = (smallbui_sus19_type2_ind >= 1) if smallbui_sus19_type2_ind<.

.         drop smallbui_sus19_type2_ind

. 
.         gen smallbui_sus19_type3 = (smallbui_sus19_type3_ind >= 1) if smallbui_sus19_type3_ind<.

.         drop smallbui_sus19_type3_ind

. 
. 
.         * online banking
.         gen online_banking = online_banking_ind >= 1

.         //tab  online_banking
.         drop online_banking_ind

. 
.         * online buying
.         gen online_buy = online_buy_ind >= 1

.         //tab  online_buy
.         drop online_buy_ind

. 
.         * online selling
.         gen online_sell = online_sell_ind >= 1

.         //tab  online_sell
.         drop online_sell_ind

. 
.         * anyone in hh with health complaint
.         gen health_complaints_hh = health_complaints > 0 & health_complaints != .

.         // summ health_complaints_hh
.         drop health_complaints

. 
.         * dummy for any severe/mild disability
.         // any disability at all
.         egen any_disability_hh = rowtotal(*_dis_*)

.         gen any_disability = any_disability_hh >= 1

.         // summ any_disability
.         drop any_disability_hh

. 
.         // severe disability
.         egen severe_disabilty_hh = rowtotal(*dis_sev)

.         gen severe_disabilty = severe_disabilty_hh >= 1

.         // summ severe_disabilty
.         drop severe_disabilty_hh

.         drop *_dis_mild* *_dis_sev*

. 
.         // summ
.         tempfile ind

.         save `ind'
file /var/folders/23/7_md6wbn6ns7m_ppp9yb96sh0000gp/T//S_18300.000007 saved as .dta format

. 
. /*----------------------------------------------------*/
.             /* Section: Merge Datasets */
. /*----------------------------------------------------*/
. 
. ** Merge SUSENAS modules into household module
.         u "$importable/kor19rt.dta", clear

. 
.         merge 1:1 RENUM using `ind'

    Result                      Number of obs
    -----------------------------------------
    Not matched                             0
    Matched                           315,672  (_merge==3)
    -----------------------------------------

.         drop _m

. 
.         merge 1:1 RENUM using `blok42'

    Result                      Number of obs
    -----------------------------------------
    Not matched                       151,173
        from master                   151,173  (_merge==1)
        from using                          0  (_merge==2)

    Matched                           164,499  (_merge==3)
    -----------------------------------------

.         drop _m

. 
.         merge 1:1 RENUM using `blok6'

    Result                      Number of obs
    -----------------------------------------
    Not matched                             0
    Matched                           315,672  (_merge==3)
    -----------------------------------------

.         drop _m

. 
. ** Create kabupaten identifier
.         tostring R101, gen(R101S)
R101S generated as str2

.         tostring R102, gen(R102S)
R102S generated as str2

.         gen KABU = R101S + "0" + R102S if length(R102S) == 1
(158,609 missing values generated)

.         replace KABU = R101S + R102S if length(R102S) > 1
(158,609 real changes made)

.         assert length(KABU) == 4

.         destring KABU, replace
KABU: all characters numeric; replaced as int

.         drop R101S R102S

. 
. ** Merge in kabupaten names (treatment and control districts identified by kabu name, so need name to merge)
.         preserve

.         import excel "${importable}/province_names.xlsx", firstrow clear
(4 vars, 514 obs)

.         rename KodeProvinsi R101

.         rename KodeKabupaten R102

.         destring R101, replace
R101 already numeric; no replace

.         destring R102, replace
R102: all characters numeric; replaced as byte

.         rename NamaKabupaten namakabupaten

.         rename NamaProvinsi provinsi

.         tempfile kabu_names

.         save `kabu_names'
file /var/folders/23/7_md6wbn6ns7m_ppp9yb96sh0000gp/T//S_18300.000009 saved as .dta format

.         restore

. 
.         merge  m:1 R101 R102 using "`kabu_names'", assert(3) nogen

    Result                      Number of obs
    -----------------------------------------
    Not matched                             0
    Matched                           315,672  
    -----------------------------------------

. 
. 
. *** Merge in treatment assignments
.         * load treatment and control status dataset
.         preserve

.         u "$randomization/treatments.dta", clear

.         keep namaprovinsi namakabupaten treated finalstratum

.         rename namaprovinsi provinsi

.         keep if treated != .
(75 observations deleted)

. 
.         * kota/kab variable to distinguish between districts with same name in same province
.         /* I identify which districts are experimental using ⁨05_Intervention⁩/01 Impact Evaluation⁩/20190523_Kab Kota Ph
> ase Expansion.xlsx,
>                         which distinguishes between kota and kab for same-name districts. I assign kab status to these
>                         districts to match the phase sheet. */
.         gen kab = 1 if inlist(namakabupaten, "BIMA", "GORONTALO", "PEKALONGAN", "SERANG")
(101 missing values generated)

. 
.         * rename Kabupaten Pontianak to Mempawah (name change in 2014)
.         // Note: Kota Pontianak is A/B district, kabupaten Mempawah/Pontianak is IE district
.         replace namakabupaten = "MEMPAWAH" if namakabupaten == "PONTIANAK"
(1 real change made)

.         tempfile treatments

.         save `treatments'
file /var/folders/23/7_md6wbn6ns7m_ppp9yb96sh0000gp/T//S_18300.00000b saved as .dta format

.         restore

. 
.         * abbreviate province names to match treatment assignment file
.         replace provinsi = "KEP. BANGKA BELITUNG" if provinsi == "KEPULAUAN BANGKA BELITUNG"
(3,707 real changes made)

.         replace provinsi = "D I YOGYAKARTA" if provinsi == "DI YOGYAKARTA"
(3,734 real changes made)

.         replace provinsi = "NTB" if provinsi == "NUSA TENGGARA BARAT"
(6,410 real changes made)

. 
.         * create kab variable for corresponding districts
.         gen kab = 1 if inlist(KABU, 5206, 7502, 3326, 3604)
(312,821 missing values generated)

. 
.         * merge treatment assignments
.         merge  m:1 provinsi namakabupaten kab using "`treatments'"

    Result                      Number of obs
    -----------------------------------------
    Not matched                       249,176
        from master                   249,176  (_merge==1)
        from using                          0  (_merge==2)

    Matched                            66,496  (_merge==3)
    -----------------------------------------

.         assert _m != 2

.         drop _m

. 
.         * holdout sample dummy
.         gen holdoutsample = finalstratum == 1000

. 
. 
. *** Merge in treatment phase
.         * load treatment phase dataset
.         preserve

.         u "$randomization/phases_T_n.dta", clear

.         append using "$randomization/phases_C_n.dta"
(variable namakabupaten was str20, now str23 to accommodate using data's values)

.         keep namaprovinsi namakabupaten treated phase*

.         rename namaprovinsi provinsi

.         keep if treated != .
(75 observations deleted)

. 
.         * differentiate between same-name districts
.         gen kab = 1 if inlist(namakabupaten, "BIMA", "GORONTALO", "PEKALONGAN", "SERANG")
(101 missing values generated)

. 
.         * rename Kabupaten Pontianak to Mempawah (name change in 2014)
.         // Note: Kota Pontianak is A/B district, kabupaten Mempawah/Pontianak is IE district
.         replace namakabupaten = "MEMPAWAH" if namakabupaten == "PONTIANAK"
(1 real change made)

. 
.         * replace phase = 0 if control & set to 0 if missing
.         foreach var of varlist phase? {
  2.                 replace `var' = 0 if treated == 0
  3.                 replace `var' = 0 if `var' == .
  4.         }
(63 real changes made)
(32 real changes made)
(63 real changes made)
(29 real changes made)
(63 real changes made)
(23 real changes made)

.         // bysort treated: summ phase?
. 
.         tempfile phases

.         qui save `phases'

.         restore

. 
.         * merge phase assignments
.         merge m:1 provinsi namakabupaten kab using "`phases'"

    Result                      Number of obs
    -----------------------------------------
    Not matched                       249,176
        from master                   249,176  (_merge==1)
        from using                          0  (_merge==2)

    Matched                            66,496  (_merge==3)
    -----------------------------------------

.         assert _m != 2

.         drop _m

.         drop kab

. 
. /*
> ** remove treated control districts from experimental sample
>         // treated controls identified from: "J-PAL Raskin Transition/05_Intervention/01 Impact Evaluation/20190523_Kab K
> ota Phase Expansion.xlsx"
>         replace treated = . if provinsi == "SUMATERA UTARA"     & namakabupaten == "LANGKAT"
>         replace treated = . if provinsi == "KALIMANTAN SELATAN" & namakabupaten == "TABALONG"
>         replace treated = . if provinsi == "JAWA TENGAH"        & namakabupaten == "PEKALONGAN"
> */
. 
. * dummy for experimental districts
.         gen experimental = treated != .

. 
. ** assert double named districts properly assigned
.         foreach dist of numlist 5272 3279 7571 3375 3673 {
  2.                 assert experimental == 0 if KABU == `dist'
  3.         }

.         foreach dist of numlist 6303 5206 7502 3326 3604 {
  2.                 assert experimental == 1 if KABU == `dist'
  3.         }

. 
. 
. /*----------------------------------------------------*/
.             /* Section: Household Module */
. /*----------------------------------------------------*/
. 
. *** Rastra variables
.         * Rastra receipt
.         // tab  R2101
.         gen raskin_receipt = R2101 == 1

.         // summ raskin_receipt
. 
. *** BPNT variables
.         * BPNT receipt
.         // tab  R2109
.         gen bpnt_receipt = R2109 == 1

.         // summ bpnt_receipt
. 
.         * BPNT use
.         // tab  R2110BK2
.         gen bpnt_use = ( R2110BK2 == 1 | R2110BK3 == 1 | R2110BK4 == 1 | R2110BK5 == 1)

. 
. 
.         * dummy for receiving any Rastra or BPNT
.         // all HHs which report Rastra receipt get some rice
.         gen receive_subsidy = bpnt_receipt == 1 | raskin_receipt == 1

.         // summ receive_subsidy
. 
.         * log per capita consumption (monthly)
.         // summ EXP_CAP
.         gen log_cons = ln(EXP_CAP)

. 
.         * per capita consumption, in levels (monthly)
.         gen cons = EXP_CAP

. 
.         * KKS card
.         // tab  R2105
.         gen kks = R2105 == 1 | R2105 == 2

. 
.         * pkh recipient
.         // tab  R2106
.         gen pkh_receipt = R2106 == 1

. 
.         * where to receive PKH assistance
.         // tab  R2107C
.         gen pkh_post = R2107C == 1 if pkh_receipt == 1
(274,873 missing values generated)

.         gen pkh_atm = R2107C == 2 if pkh_receipt == 1
(274,873 missing values generated)

.         gen pkh_bank = R2107C == 3 if pkh_receipt == 1
(274,873 missing values generated)

.         gen pkh_agent = R2107C == 4 if pkh_receipt == 1
(274,873 missing values generated)

.         gen pkh_other = R2107C == 5 if pkh_receipt == 1
(274,873 missing values generated)

.         // summ pkh*
. 
.         * online transactions
.         // summ online_buy online_sell
.         gen online_transact = online_buy + online_sell > 0

.         // tab  online_transact
. 
.         * any financial transactions fees
.         // summ fin_trans_fees
.         gen any_fin_trans_fee = fin_trans_fees > 0 & fin_trans_fees != .

.         drop fin_trans_fees

.         rename any_fin_trans_fee fin_trans_fees

.         // tab  fin_trans_fees
. 
.         * any cell phone spending
.         // summ cell_phone_spend
.         gen cell_phone_buy = cell_phone_spend > 0 & cell_phone_spend != .

.         // tab  cell_phone_buy
.         drop cell_phone_spend

. 
. ** credit variables
.         * receive credit from any source
.         // tab  R1901A
.         gen credit = R1901A == 1 | R1901B == 1 | R1901C == 1 | R1901D == 1 | R1901E == 1 | ///
>                 R1901F == 1 | R1901G == 1 | R1901H == 1 | R1901I == 1 | R1901J == 1

.         // tab  credit
. 
.         * receive credit from KUR
.         gen credit_KUR = R1901A == 1

.         // tab  credit_KUR
. 
.         * receive credit from commercial banks
.         gen credit_bank = R1901B == 1

.         // tab  credit_bank
. 
.         * receive credit from rural credit banks (BPR)
.         gen credit_BPR = R1901C == 1

.         // tab  credit_BPR
. 
.         * receive credit from coop
.         gen credit_coop = R1901D == 1

.         // tab  credit_coop
. 
.         * receive individual loans with interest
.         gen credit_loan = R1901E == 1

.         // tab  credit_loan
. 
.         * receive mortgage loan
.         gen credit_mtg = R1901F == 1

.         // tab  credit_mtg
. 
.         * receive credit from leasing companies
.         gen credit_lease = R1901G == 1

.         // tab  credit_lease
. 
.         * receive credit from KUBE
.         gen credit_KUBE = R1901H == 1

.         // tab  credit_KUBE
. 
.         * receive credit from village owned enterprises
.         gen credit_BUMDES = R1901I == 1

.         // tab  credit_BUMDES
. 
.         * receve credit from other source
.         gen credit_other = R1901J == 1

.         // tab  credit_other
. 
.         * receive social assistance from local govt
.         //tab  R2111
.         gen local_gov_aid = R2111 == 1

. 
.         * amount of social assistance from local govt received
.         // summ R2112A_* R2112B_1
.         egen local_aid_rp = rowtotal(R2112A_* R2112B_1)

.         // bysort local_gov_aid: summ local_aid_rp
. 
.         * HH size
.         // summ R301
.         gen num_in_hh = R301

. 
.         * Log HH size
.         gen log_num_in_hh = ln(num_in_hh)

. 
.         * fraction kids in hh
.         gen fraction_kids = num_kids / num_in_hh

.         // summ fraction_kids
. 
.         * HH head elementary edu or less
.         // summ elementary junior_high high_school tertiary
.         gen elementary_or_less = elementary

.         replace elementary_or_less = 1 if elementary == 0 & junior_high == 0 & high_school == 0 & tertiary == 0
(18,126 real changes made)

.         // summ elementary_or_less
. 
. 
. ** phk_other_location
. 
.         gen phk_other_location=pkh_bank
(274,873 missing values generated)

.         replace phk_other_location=1 if pkh_post==1
(2,060 real changes made)

.         replace phk_other_location=1 if pkh_atm==1
(15,600 real changes made)

.         replace phk_other_location=1 if pkh_other==1
(6,485 real changes made)

. 
. 
. /*----------------------------------------------------*/
.         /* Section: Drop & label variables  */
. /*----------------------------------------------------*/
. 
. * drop & rename
.         drop R17* R18* R19* R2* NUINFORT1 WI*

. 
. // * Save full-sample dataset
. //      save "$cleaned/finance/susenas_mar19_finance_full.dta", replace
. 
. * Keep experimental sample only
.         keep if experimental == 1
(249,176 observations deleted)

.         di _N
66496

. 
. ** Merge in readiness indices
.         * load readiness indices
.         preserve

.         import excel "$importable/20171016-Perluasan BPNT-V11.xlsx", clear firstrow sheet("514 KabKota")
(16 vars, 514 obs)

.         keep idkabu Indeks*

.         rename idkabu KABU

.         tempfile index

.         save `index'
file /var/folders/23/7_md6wbn6ns7m_ppp9yb96sh0000gp/T//S_18300.00000f saved as .dta format

.         restore

. 
.         * merge in index
.         merge m:1 KABU using `index'

    Result                      Number of obs
    -----------------------------------------
    Not matched                           409
        from master                         0  (_merge==1)
        from using                        409  (_merge==2)

    Matched                            66,496  (_merge==3)
    -----------------------------------------

.         assert _m != 1

.         drop if _m == 2
(409 observations deleted)

.         drop _m

. 
.         * clean index variables
.         foreach var of varlist Indeks* {
  2.                 replace `var' = "" if `var' == "n/a"
  3.                 destring `var', replace
  4.         }
(0 real changes made)
IndeksFasilitasKesiapanNonTu: all characters numeric; replaced as double
(0 real changes made)
IndeksInfrastuktur: all characters numeric; replaced as double
(0 real changes made)
IndeksGabungan: all characters numeric; replaced as double

.         rename IndeksFasilitasKesiapanNonTu facilities_index

.         rename IndeksInfrastuktur           infra_index

.         rename IndeksGabungan               composite_index

.         // summ *index
. 
.         * compute above median infrastructure dummy
.         preserve

.         duplicates drop KABU, force

Duplicates in terms of KABU

(66,391 observations deleted)

.         bysort treated: summ composite_index

---------------------------------------------------------------------------------------------------------------------------
-> treated = 0

    Variable |        Obs        Mean    Std. dev.       Min        Max
-------------+---------------------------------------------------------
composite_~x |         63    .4725055    .0464561   .3931999   .6309336

---------------------------------------------------------------------------------------------------------------------------
-> treated = 1

    Variable |        Obs        Mean    Std. dev.       Min        Max
-------------+---------------------------------------------------------
composite_~x |         42    .4790766    .0436685   .4074798   .5972435


.         qui summ composite_index, d

.         gen hi_comp_index = composite_index > `r(p50)' if composite_index != .

.         // summ hi_comp_index
.         keep KABU hi_comp_index

.         tempfile comp_index

.         save `comp_index'
file /var/folders/23/7_md6wbn6ns7m_ppp9yb96sh0000gp/T//S_18300.00000h saved as .dta format

.         restore

. 
.         merge m:1 KABU using `comp_index', assert(3) nogen

    Result                      Number of obs
    -----------------------------------------
    Not matched                             0
    Matched                            66,496  
    -----------------------------------------

.         // summ hi_comp_index
. 
. 
. /*----------------------------------------------------*/
.                 /* Section: Outliers */
. /*----------------------------------------------------*/
. 
. * variables to be checked for outliers
.         local continuous_vars = "local_aid_rp"

. 
. ** windsorize to 0.5 and 99.5 percentiles of continuous variables
.         foreach var of varlist `continuous_vars' {
  2.                 _pctile `var', p(0.5 99.5)
  3.                 qui gen `var'W = `var'
  4.                 replace `var'W = `r(r1)' if `var' < `r(r1)'
  5.                 replace `var'W = `r(r2)' if `var' > `r(r2)' & `var' != .
  6.                 // summ `var' `var'W
.         }
(0 real changes made)
(309 real changes made)

. 
. ** Remove outliers with z-scores of 12 and above
.         * Tag outliers with z-scores 12 and above
.                 foreach var of varlist `continuous_vars' {
  2.                   qui summ `var'
  3.                   qui gen `var'_z = abs((`var' - `r(mean)') / `r(sd)')
  4.                   // extremes `var'_z `var', n(20) high
.                   quietly count if `var'_z >= 12 & `var'_z != .
  5.                   di "`var' has `r(N)' values greater than 12 sd from the mean"
  6.                   qui gen `var'_tag = 1 if `var'_z >= 12 & `var'_z != .
  7.                 }
local_aid_rp has 65 values greater than 12 sd from the mean

. 
.         * set tagged values to missing
.         // for now: strictly set all 12> to missing
.         foreach var of varlist `continuous_vars' {
  2.                 quietly count if `var'_tag == 1
  3. 
.                 // if tagged vals in var
.                 if `r(N)' > 0 {
  4.                         // copy original variable
.                         quietly gen `var'_orig = `var'
  5. 
.                         // set tagged values to missing
.                   quietly replace `var' = . if `var'_tag == 1
  6.                         local num_miss = `r(N)'
  7.                         di "`num_miss' values in `var' set to missing"
  8.                         extremes `var'_z `var'_orig `var', n(`num_miss')  high
  9.                 }
 10.         }
65 values in local_aid_rp set to missing

  +-----------------------------------------+
  |   obs:   local_~z   local~ig   local_~p |
  |-----------------------------------------|
  | 22995.   12.14144    4800000          . |
  | 64124.   12.14144    4800000          . |
  | 62981.     12.498    4940000          . |
  | 46292.   12.65081    5000000          . |
  | 46505.   12.65081    5000000          . |
  |-----------------------------------------|
  | 55331.   12.65081    5000000          . |
  | 62601.   12.65081    5000000          . |
  | 62771.   12.65081    5000000          . |
  | 66406.   12.65081    5000000          . |
  | 46437.   12.90549    5100000          . |
  |-----------------------------------------|
  | 26815.   13.16017    5200000          . |
  |  1580.   13.66953    5400000          . |
  |  4468.   13.66953    5400000          . |
  | 50395.   13.66953    5400000          . |
  |  1600.   13.79688    5450000          . |
  |-----------------------------------------|
  | 60537.   13.92422    5500000          . |
  | 61339.   14.43358    5700000          . |
  | 32239.   14.68826    5800000          . |
  | 38240.   14.94294    5900000          . |
  | 29457.   15.19763    6000000          . |
  |-----------------------------------------|
  | 29808.   15.19763    6000000          . |
  | 39268.   15.19763    6000000          . |
  | 39956.   15.19763    6000000          . |
  | 44291.   15.19763    6000000          . |
  | 59271.   15.19763    6000000          . |
  |-----------------------------------------|
  |  6356.   16.47104    6500000          . |
  | 60280.   16.72572    6600000          . |
  | 44092.   17.74444    7000000          . |
  | 46557.   17.74444    7000000          . |
  | 46698.   18.25381    7200000          . |
  |-----------------------------------------|
  | 12196.   18.50849    7300000          . |
  | 12572.   19.27254    7600000          . |
  | 39122.   20.29126    8000000          . |
  | 62904.   20.29126    8000000          . |
  | 62130.   22.83808    9000000          . |
  |-----------------------------------------|
  | 62749.   22.83808    9000000          . |
  | 62609.   23.47478    9250000          . |
  |  4886.    25.3849   1.00e+07          . |
  |  7663.    25.3849   1.00e+07          . |
  | 28376.    25.3849   1.00e+07          . |
  |-----------------------------------------|
  | 39075.    25.3849   1.00e+07          . |
  | 39193.    25.3849   1.00e+07          . |
  | 39355.    25.3849   1.00e+07          . |
  | 44056.    25.3849   1.00e+07          . |
  | 44239.    25.3849   1.00e+07          . |
  |-----------------------------------------|
  | 44515.    25.3849   1.00e+07          . |
  | 49661.    25.3849   1.00e+07          . |
  | 50360.    25.3849   1.00e+07          . |
  |  1915.   29.20513   1.15e+07          . |
  | 28782.   30.47854   1.20e+07          . |
  |-----------------------------------------|
  | 22643.   31.75195   1.25e+07          . |
  | 15903.   33.02536   1.30e+07          . |
  | 39132.   33.02536   1.30e+07          . |
  |    31.   38.11899   1.50e+07          . |
  | 21565.   38.11899   1.50e+07          . |
  |-----------------------------------------|
  | 26434.   38.11899   1.50e+07          . |
  | 26479.   38.11899   1.50e+07          . |
  | 43336.   38.11899   1.50e+07          . |
  | 62306.   38.11899   1.50e+07          . |
  | 62402.   38.11899   1.50e+07          . |
  |-----------------------------------------|
  | 62424.   38.11899   1.50e+07          . |
  | 62471.   38.11899   1.50e+07          . |
  | 62859.   38.11899   1.50e+07          . |
  | 63186.   38.11899   1.50e+07          . |
  | 41124.   76.32127   3.00e+07          . |
  +-----------------------------------------+

. 
.         drop *z *tag

.         qui ds *orig

.         di "Original versions of vars with outliers removed: `r(varlist)'"
Original versions of vars with outliers removed: local_aid_rp_orig

. 
.         * label new variables
.         foreach var of varlist *W {
  2.                 local varname = substr("`var'", 1, length("`var'") - 1)
  3.                 local label : var label `varname'
  4.                 la var `var' "`label' (Winsorized)"
  5.         }

.         foreach var of varlist *_orig {
  2.                 local varname = substr("`var'", 1, length("`var'") - 5)
  3.                 local label : var label `varname'
  4.                 la var `var' "`label' (Raw)"
  5.                 local labelnew : var label `var'
  6.         }

. 
. 
. /*----------------------------------------------------*/
.           /* Section: Merge in baseline DV */
. /*----------------------------------------------------*/
. 
. ** Add in baseline (march 2018) outcome variables
.         preserve

.         u "$cleaned/finance/susenas_mar18_finance_hh_full.dta", clear

.         drop *orig

.         rename smallbui_sus19_all TALL

. 
.         * collapse outcome variables by district by urban/rural areas
.         rename R105 urban

.         collapse (mean) internet_use cell_phone* local_gov_aid local_aid_rp* credit* savings_account = any_savings kks pk
> h* fin_trans_fees      [aw = FWT], by(KABU urban)

. 
. 
.         rename *              *_mar18

.         rename KABU_mar18     KABU

.         rename urban_mar18    urban

.         // summ
.         tempfile mar_2018

.         save `mar_2018'
file /var/folders/23/7_md6wbn6ns7m_ppp9yb96sh0000gp/T//S_18300.00000j saved as .dta format

.         restore

. 
.         * merge in baseline DVs
.         rename R105 urban

.         merge m:1 KABU urban using `mar_2018'

    Result                      Number of obs
    -----------------------------------------
    Not matched                           752
        from master                         0  (_merge==1)
        from using                        752  (_merge==2)

    Matched                            66,496  (_merge==3)
    -----------------------------------------

.         assert _m != 1

.         keep if _m == 3
(752 observations deleted)

.         drop _m

. 
. ** Save experimental HHs dataset
.         recode urban (2=0)
(45,969 changes made to urban)

.         // summ
.         save "$cleaned/finance/susenas_mar19_finance_treats.dta", replace
file /Users/clotairemit.edu/Dropbox (MIT)/J-PAL Raskin Transition/10_Analysis&Results/Agent Experiment
    Analysis/01_Data/cleaned/finance/susenas_mar19_finance_treats.dta saved

.         // u "$cleaned/finance/susenas_mar19_finance_treats.dta", clear
. 
. 
. /*----------------------------------------------------*/
.             /* Section: Individual dataset */
. /*----------------------------------------------------*/
. 
. ** Create kabupaten identifier
.         u `ind_dataset', clear

.         tostring R101, gen(R101S)
R101S generated as str2

.         tostring R102, gen(R102S)
R102S generated as str2

.         gen KABU = R101S + "0" + R102S if length(R102S) == 1
(600,191 missing values generated)

.         replace KABU = R101S + R102S if length(R102S) > 1
(600,191 real changes made)

.         assert length(KABU) == 4

.         destring KABU, replace
KABU: all characters numeric; replaced as int

.         drop R101S R102S

. 
. ** Merge in kabupaten/province names
.         preserve

.         import excel "${importable}/province_names.xlsx", firstrow clear
(4 vars, 514 obs)

.         rename KodeProvinsi R101

.         rename KodeKabupaten R102

.         destring R101, replace
R101 already numeric; no replace

.         destring R102, replace
R102: all characters numeric; replaced as byte

.         rename NamaKabupaten namakabupaten

.         rename NamaProvinsi provinsi

.         tempfile kabu_names

.         save `kabu_names'
file /var/folders/23/7_md6wbn6ns7m_ppp9yb96sh0000gp/T//S_18300.00000l saved as .dta format

.         restore

. 
.         merge  m:1 R101 R102 using "`kabu_names'", assert(3) nogen

    Result                      Number of obs
    -----------------------------------------
    Not matched                             0
    Matched                         1,204,466  
    -----------------------------------------

. 
. ** merge in treatment and strata
.         preserve

.         u "$cleaned/finance/susenas_mar19_finance_treats.dta", clear

.         duplicates drop KABU, force

Duplicates in terms of KABU

(66,391 observations deleted)

.         keep KABU treated experimental finalstratum phase*

.         tempfile treatments

.         qui save `treatments'

.         restore

. 
.         merge m:1 KABU using `treatments'

    Result                      Number of obs
    -----------------------------------------
    Not matched                       953,352
        from master                   953,352  (_merge==1)
        from using                          0  (_merge==2)

    Matched                           251,114  (_merge==3)
    -----------------------------------------

.         assert _m != 2

.         drop _m

.         keep if experimental == 1
(953,352 observations deleted)

. 
. ** merge in march 2018 baseline controls
.         merge m:1 R101 R102 R105 using "$cleaned/finance/susenas_mar18_finance_ind.dta"

    Result                      Number of obs
    -----------------------------------------
    Not matched                           752
        from master                         0  (_merge==1)
        from using                        752  (_merge==2)

    Matched                           251,114  (_merge==3)
    -----------------------------------------

.         assert _m != 1

.         drop if _m == 2
(752 observations deleted)

.         drop _m

.         // summ *mar18
. 
. * save
.         rename *_ind* **

.         recode R105 (2=0), gen(urban)
(172,352 differences between R105 and urban)

.         save "$cleaned/finance/susenas_mar19_finance_ind.dta", replace
file /Users/clotairemit.edu/Dropbox (MIT)/J-PAL Raskin Transition/10_Analysis&Results/Agent Experiment
    Analysis/01_Data/cleaned/finance/susenas_mar19_finance_ind.dta saved

.         // u "$cleaned/finance/susenas_mar19_finance_ind.dta", clear
. 
. // DONE
. cap log close
